\usepackage{booktabs} \usepackage{longtable} \usepackage{array} \usepackage{multirow} \usepackage{wrapfig} \usepackage{float} \usepackage{colortbl} \usepackage{pdflscape} \usepackage{tabu} \usepackage{threeparttable} \usepackage{threeparttablex} \usepackage[normalem]{ulem} \usepackage{makecell} \usepackage{xcolor}

NHANES Mortality Analysis

Code
# load packages 
library(tidyverse)
library(gtsummary)
library(gt)
library(tidymodels)
library(censored)
library(paletteer)
library(survey)
library(patchwork)
# paletteer_d("colorBlindness::PairedColor12Steps")
col1 = "#FF7F00FF"; col2 = "#19B2FFFF"

# load data 
# results from univariate
wt_single_male = readRDS(here::here("results", "metrics_wtd_100_singlevar_male.rds"))
wt_single_female = readRDS(here::here("results", "metrics_wtd_100_singlevar_female.rds"))
wt_single = readRDS(here::here("results", "metrics_wtd_100_singlevar.rds"))
# results from multivariable
wt_male = readRDS(here::here("results", "metrics_wtd_100_male.rds"))
wt_female = readRDS(here::here("results", "metrics_wtd_100_female.rds"))
wt_all = readRDS(here::here("results", "metrics_wtd_100.rds"))

# covariate/pa df 
df_all = readRDS(here::here("data", "covariates_accel_mortality_df.rds")) 

Loading and preparing data

See code_README.md for description of the pipeline to process the data. For this analysis, we have one estimate for each physical activity variable, along with demographic variables for all participants who received an accelerometer. We create a separate dataset for the mortality analysis restricted to just individuals with valid accelerometry data and who are between 50 and 79 years old, and who are not missing any covariate data that will be used in the models. Finally, we winsorize the PA variables at the 99th percentile, to remove any extremely high outliers.

Code
df_mortality =
  df_all %>%
  filter(num_valid_days >= 3) %>%
  filter(age_in_years_at_screening >= 50 &
           age_in_years_at_screening < 80) 

df_accel = 
  df_all %>% 
  filter(num_valid_days >= 3 & age_in_years_at_screening >= 18)

df_mortality_win =
  df_mortality %>%
  filter(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm, total_PAXMTSM),
                ~!is.na(.x))) %>%
  mutate(event_time = permth_exm / 12) %>% 
  ungroup() %>%
  mutate(across(c(contains("total"), contains("peak")), ~DescTools::Winsorize(.x, probs = c(0, 0.99)))) 

df_accel_win =
  df_accel %>%
  mutate(across(c(contains("total"), contains("peak")), ~DescTools::Winsorize(.x, probs = c(0, 0.99)))) 
Code
# TO DO: check these models 
# sample sizes for exclusion diagram

df_all %>% 
  count(data_release_cycle)

df_all %>% 
  group_by(data_release_cycle) %>% 
  count(has_accel)

acc_subset = df_all %>% 
  filter(has_accel)

acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count()

acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count(valid_accel)

acc_subset %>% 
  group_by(data_release_cycle) %>% 
  count(inclusion_type)

acc_subset %>% 
  filter(valid_accel) %>% 
  mutate(adult = age_in_years_at_screening >= 18) %>% 
  group_by(data_release_cycle) %>% 
  count(adult)

acc_subset %>% 
  filter(valid_accel) %>% 
  mutate(b50 = age_in_years_at_screening < 50,
         o80 = age_in_years_at_screening >= 80) %>% 
  group_by(data_release_cycle) %>% 
  count(b50, o80)

acc_subset %>% 
  filter(num_valid_days >= 3 & age_in_years_at_screening >= 50 & age_in_years_at_screening < 80) %>% 
  filter(!(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm),
                ~!is.na(.x)))) %>% 
  group_by(data_release_cycle) %>%
  count()

4351+269+34
6497-(4351+269+34)
3913+264+48

6020-4225

Figures

Distribution of PA Variables

Probably supplemental

Code
df_accel %>%
  select(contains("steps") & contains("total"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>% 
  ggplot(aes(x = value / 1000, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Steps x 1000", y = "Density", title = "Distribution of Mean Step Counts",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(contains("steps") & contains("total"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>% 
  ggplot(aes(x = value / 1000, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Steps x 1000", y = "Density", title = "Distribution of Winsorized Mean Step Counts",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(!contains("steps") & contains("total"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Value", y = "Density", title = "Distribution of Mean PA Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(!contains("steps") & contains("total"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Daily Value", y = "Density", title = "Distribution of Winsorized Mean PA Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(contains("steps") & contains("peak1"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak 1 Min Cadence (steps/min)", y = "Density", title = "Distribution of Peak 1 Min Cadence",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(contains("steps") & contains("peak1"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak 1 Min Cadence (steps/min)", y = "Density", title = "Distribution of Winsorized Peak 1 Min Cadence",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(contains("steps") & contains("peak30"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_grid(gender~name)+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak 30 Min Cadence (steps/min)", y = "Density", title = "Distribution of Peak 30 Min Cadence",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(contains("steps") & contains("peak30"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("Actilife", "ADEPT", "Oak", "Sc. RF", "Sc. SSL", "Verisense", "Verisense rev."))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak 30 Min Cadence (steps/min)", y = "Density", title = "Distribution of Winsorized Peak 30 Min Cadence",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(!contains("steps") & contains("peak1"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak Value", y = "Density", title = "Distribution of Peak 1 Min Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(!contains("steps") & contains("peak1"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak Value", y = "Density", title = "Distribution of Winsorized Peak 1 Min Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel %>%
  select(!contains("steps") & contains("peak30"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak Value", y = "Density", title = "Distribution of Peak 30 Min Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Code
df_accel_win %>%
  select(!contains("steps") & contains("peak30"), SEQN, gender) %>% 
  pivot_longer(cols = -c(SEQN, gender)) %>% 
  mutate(name = factor(name, labels = c("AC", "log10 AC", "log10 MIMS", "MIMS"))) %>%
  ggplot(aes(x = value, fill = gender, col = gender))+
  geom_density() + 
  facet_wrap(gender~name, scales = "free")+
  theme_bw() + 
  scale_fill_manual(values = c(col1, col2))+
  scale_color_manual(values = c(col1, col2))+
  theme(legend.position = "none",
        panel.grid.minor.x = element_blank(),
        axis.title = element_text(size = 14)) + 
  labs(x = "Mean Peak Value", y = "Density", title = "Distribution of Winsorized Peak 30 Min Variables",
       subtitle = "All Individuals aged 18+ with Valid Accelerometry Data")

Mean Step Counts and Peak Cadence by Age and Sex

Tent. Figure 1

Code
df_means = df_accel %>%
  filter(age_in_years_at_screening >= 18 & age_in_years_at_screening < 80) %>% 
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>% 
  ungroup() 

# survey_design <- svydesign(ids = ~1, weights = ~weight, data = df)
survey_design = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df_means,
  nest = TRUE
)

# Calculate mean estimate by age
calc_by_age =
  function(var, df) {
    # var = "total_oaksteps"
    formula = as.formula(paste("~", var))
    total_by_age_gender = svyby(formula,
                               ~ age_in_years_at_screening,
                               survey_design,
                               svymean) %>%
      rename(mean = contains(var)) %>%
      mutate(metric = var)
  }

means_df = 
  map_dfr(.x = df_means %>% select(contains("total") | contains("peak")) %>% colnames(),
          .f = calc_by_age, df = df)

models = means_df %>%
   mutate(lb = mean - 1.96 * se,
         ub = mean + 1.96 * se) %>% 
        tidyr::nest(data = -c(metric)) %>%
        dplyr::mutate(
                # Perform loess calculation on each group
                m = purrr::map(data, loess,
                               formula = mean ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_mean = purrr::map(m, `[[`, "fitted"),
                l = purrr::map(data, loess,
                               formula = lb ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_lb = purrr::map(l, `[[`, "fitted"),
                u = purrr::map(data, loess,
                               formula = ub ~ age_in_years_at_screening, span = .75),
                # Retrieve the fitted values from each model
                fitted_ub = purrr::map(u, `[[`, "fitted")
        )

# Apply fitted y's as a new column
results = models %>%
        dplyr::select(-m, -l, -u) %>%
        tidyr::unnest(cols = c(data, contains("fitted")))
Code
p1 = results %>% 
  filter(grepl("step", metric) & grepl("total", metric)) %>%
  mutate(across(contains("fitted"), ~.x / 1000),
                metric = factor(metric, levels = c("total_actisteps", 
                                            "total_adeptsteps",
                                            "total_oaksteps",
                                            "total_vssteps",
                                            "total_vsrevsteps",
                                            "total_scsslsteps",
                                            "total_scrfsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.", "Stepcount SSL", "Stepcount RF"))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub, color = metric, fill = metric)) +
  # facet_grid(. ~ metric) +
  geom_line(linewidth = 1) + 
  geom_ribbon(alpha = .2, linetype = 0) +
    scale_color_paletteer_d("ggthemes::few_Dark")+
  scale_fill_paletteer_d("ggthemes::few_Dark")+

  # scale_color_brewer(palette= "Dark2") + 
  # scale_fill_brewer(palette = "Dark2") + 
  # scale_color_manual(values = c(col1, col2), name = "")+
  # scale_fill_manual(values = c(col1, col2), name = "")+
  theme_light() +
  theme(legend.position = "bottom",
        legend.title = element_blank()) +
  labs(x = "Age (yrs)", y = "Mean Daily Steps x 1000",
       title = "Smoothed Survey Weighted Mean Daily Steps by Age ")+
  scale_y_continuous(breaks=seq(0,16,2))+
    scale_x_continuous(breaks=seq(20,80,10))

p1

Code
p2 = results %>% 
  filter(grepl("peak", metric) & grepl("step", metric)) %>% 
  mutate(type = sub("_.*", "", metric),
         method = sub("^[^_]*_", "", metric),
         type = factor(type, labels = c("Peak 1-minute", "Peak 30-minute"))) %>% 
  filter(grepl("step", metric)) %>%
  mutate(method = factor(method, levels = c("actisteps", 
                                            "adeptsteps",
                                            "oaksteps",
                                            "vssteps",
                                            "vsrevsteps",
                                            "scsslsteps",
                                            "scrfsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense revised", "Stepcount SSL", "Stepcount RF"))) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = fitted_mean,
             ymin = fitted_lb, ymax = fitted_ub, fill = method, color = method)) +
  facet_grid(. ~ type) +
  geom_line(aes(linetype = type)) + 
  geom_ribbon(alpha = .2, aes(linetype = type), color = NA) +
    scale_color_paletteer_d("ggthemes::few_Dark")+
    scale_fill_paletteer_d("ggthemes::few_Dark")+
  # scale_color_manual(values = c(col1, col2), name = "")+
  # scale_fill_manual(values = c(col1, col2), name = "")+
  theme_light() +
  scale_linetype_discrete(labels = c("1 Minute", "30 Minute"), guide = F) + 
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.text =  element_text(size = 12)) +
  labs(x = "Age (yrs)", y = "Peak Cadence (steps/min)",
       title = "Smoothed Survey Weighted Peak Cadence by Age")+
    scale_y_continuous(breaks=seq(20, 120,15))+
  scale_x_continuous(breaks=seq(20,80,10))
p2

Code
p3 = means_df %>% 
  filter(grepl("step", metric) & grepl("total", metric)) %>% 
  mutate(metric = factor(metric, levels = c("total_actisteps", 
                                            "total_adeptsteps",
                                            "total_oaksteps",
                                            "total_vssteps",
                                            "total_vsrevsteps",
                                            "total_scsslsteps",
                                            "total_scrfsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.", "Stepcount SSL", "Stepcount RF"))) %>% 
    group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean))/lag(mean)*100) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = pct_chg, color = metric))+
  geom_smooth(method = "loess", se = FALSE) + 
  scale_color_paletteer_d("ggthemes::few_Dark", name = "")+
  # scale_color_brewer(palette = "Dark2", name = "") + 
  geom_hline(aes(yintercept = 0), col = "darkgrey", linetype = "dashed") + 
  theme_light() + 
  labs(x = "Age (yrs)", y = "Percent Change in Mean Daily Steps",
       title = "% Change in Mean Daily Steps by Age")+
  theme(legend.position = c(.1, .4))+
    scale_x_continuous(breaks=seq(20,80,10))

p3 

Code
means_df %>% 
  filter(grepl("total", metric) & grepl("step", metric)) %>% 
  mutate(metric = factor(metric, levels = c("total_actisteps",
                                            "total_adeptsteps",
                                            "total_oaksteps",
                                            "total_vssteps",
                                            "total_vsrevsteps",
                                            "total_scsslsteps",
                                            "total_scrfsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.", "Stepcount SSL", "Stepcount RF"))) %>%
    group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean))/lag(mean)*100) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = pct_chg, color = metric))+
  geom_smooth(method = "loess", se = FALSE) + 
  scale_color_brewer(palette = "Dark2", name = "") + 
  geom_hline(aes(yintercept = 0), col = "darkgrey", linetype = "dashed") + 
  theme_light() + 
  labs(x = "Age (yrs)", y = "Percent Change in Mean Daily Steps",
       title = "Percent Change in Mean Daily Steps by Age")+
  theme(legend.position = c(.1, .4),
        legend.title = element_blank())

Code
means_df %>% 
  filter(grepl("total", metric) & grepl("step", metric)) %>% 
  mutate(metric = factor(metric, levels = c("total_actisteps",
                                            "total_adeptsteps",
                                            "total_oaksteps",
                                            "total_vssteps",
                                            "total_vsrevsteps",
                                            "total_scsslsteps",
                                            "total_scrfsteps"),
                         labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.", "Stepcount SSL", "Stepcount RF"))) %>%
    group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean))/lag(mean)*100,
         chg = mean - lag(mean)) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = chg, color = metric))+
  geom_smooth(method = "loess", se = FALSE) + 
  scale_color_brewer(palette = "Dark2", name = "") + 
  geom_hline(aes(yintercept = 0), col = "darkgrey", linetype = "dashed") + 
  theme_light() + 
  labs(x = "Age (yrs)", y = "Percent Change in Mean Daily Steps",
       title = "Change in Mean Daily Steps by Age")+
  theme(legend.position = c(.1, .27),
        legend.title = element_blank())

Code
p1 / (p2 + p3 ) + plot_layout(guides = "collect", nrow = 2, axis_titles = "collect") & theme(legend.position = 'bottom')

Code
ggsave(here::here("manuscript", "figures", "distributions.png"), dpi = 400, width = 10, height = 8)
Code
# calculate in age grps of 5 
df_means = df_accel %>%
  filter(age_in_years_at_screening >= 18 & age_in_years_at_screening < 80) %>% 
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>% 
  ungroup() %>% 
  mutate(age_grp = cut(age_in_years_at_screening, breaks = c(seq(18, 73, 5), 80), include.lowest = TRUE))

# survey_design <- svydesign(ids = ~1, weights = ~weight, data = df)
survey_design = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df_means,
  nest = TRUE
)

# Calculate mean estimate by age
calc_by_age_group =
  function(var, df) {
    # var = "total_oaksteps"
    formula = as.formula(paste("~", var))
    total_by_age_gender = svyby(formula,
                               ~ age_grp,
                               survey_design,
                               svymean) %>%
      rename(mean = contains(var)) %>%
      mutate(metric = var)
  }

cats_df = 
  map_dfr(.x = df_means %>% select(contains("total") | contains("peak")) %>% colnames(),
          .f = calc_by_age_group, df = df)

chg_df = 
  cats_df %>% 
  group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean)) / lag(mean) * 100)

chg_df %>% 
  filter(grepl("step", metric) & grepl("total", metric)) %>% 
  ggplot(aes(x = age_grp, y = pct_chg, color = metric, group = metric)) + 
  geom_step()+
  facet_wrap(.~metric, nrow = 1)

means_df %>% 
  filter(grepl("step", metric) & grepl("total", metric)) %>% 
    group_by(metric) %>% 
  mutate(pct_chg = (mean - lag(mean))/lag(mean)*100) %>% 
  ggplot(aes(x = age_in_years_at_screening, y = pct_chg, color = metric))+
  geom_smooth(method = "loess", se = FALSE)
  # facet_wrap(.~metric, nrow  = 1)

Correlations between Step Counts and AC/MIMS

Figure 2?

Code
cor_mat = 
  df_accel %>% select(contains("total")) %>%
  cor(., use = "complete", method = "spearman") 
pvals = df_accel %>% select(contains("total")) %>%
  rstatix::cor_pmat(.) %>% 
  select(-rowname) %>% 
  as.matrix()
colnames(cor_mat) = rownames(cor_mat) =colnames(pvals) = rownames(pvals) = c("AC", "Actilife", "ADEPT", "log10 AC", "log10 MIMS", "Oak", "MIMS", "Sc. RF", "Sc. SSL", "Vs rev.", "Vs")


corrplot::corrplot(cor_mat,  
         method="circle", 
         type="upper", 
         # col=paletteer_d("colorBlindness::Blue2DarkRed12Steps"), 
        col= paletteer_d("colorBlindness::Blue2Orange8Steps"),
         tl.col="black", 
         tl.srt=45, 
         p.mat = pvals,
         col.lim=c(0.2,1),
         sig.level = 0.05, 
         insig = "blank",
         is.corr = FALSE,
        diag = FALSE,
         order = "AOE",
        title = "Spearman Correlations",
        addCoef.col = 'black') 

Single variable mortality prediction

Figure 3?

Code
# make label df 
var_labels = 
  tibble(names = 
           unique(wt_single$variable),
         labels = c("Age at screening", 
                    "BMI Category", "Gender", "Race/ethnicity",
                    "Diabetes", "Education level", 
                    "CHF", "CHD", "Heart attack", 
                    "Stroke", "Cancer", "Alcohol use",
                    "Smoking status", "Mobility problem", 
                    "Self-reported health", "AC",
                     "Actilife steps", "ADEPT steps",
                    "log10 AC", "log10 MIMS", "Oak steps", "MIMS",
                     "Sc. RF steps", "Sc. SSL steps",
                    "Verisense rev. steps", "Verisense steps",
                    "Peak1 AC", 
                     "Peak1 Actilife steps", "Peak1 ADEPT steps",
                    "Peak1 log10 AC", "Peak1 log10 MIMS", "Peak1 Oak steps","Peak1 MIMS",
                     "Peak1 Sc. RF steps", "Peak1 Sc. SSL steps",
                    "Peak1 Verisense rev. steps", "Peak1 Verisense steps",
                     "Peak30 AC", 
                     "Peak30 Actilife steps", "Peak30 ADEPT steps",
                    "Peak30 log10 AC", "Peak30 log10 MIMS", "Peak30 Oak steps","Peak30 MIMS",
                     "Peak30 Sc. RF steps", "Peak30 Sc. SSL steps",
                    "Peak30 Verisense rev. steps", "Peak30 Verisense steps"))

# paletteer_d("ggthemes::colorblind")
wt_single %>% 
  filter(!grepl("peak", variable)) %>% 
  mutate(var_group = case_when(
    grepl("steps", variable) ~ "Step variable",
    grepl("total", variable) ~ "Non-step accelerometry variable",
    TRUE ~ "Non-accelerometry variable"
  )) %>% 
  # mutate(variable = sub(".*mean\\_", "", variable)) %>% 
  group_by(variable, var_group) %>% 
  summarize(mean = mean(concordance),
            sd = sd(concordance),
            se = sd(concordance)/sqrt(n())) %>% 
  mutate(ci_low = mean - 1.96*se,
         ci_high = mean + 1.96*se) %>% 
  ungroup() %>% 
  left_join(var_labels, by = c("variable" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, mean)) %>% 
  ggplot(aes(y = labels, x = mean, xmin = ci_low, xmax = ci_high, color = var_group))+
  geom_point() + 
  geom_errorbarh() + 
  theme_bw() + 
  scale_color_manual(values = c("#009E73FF", "#0072B2FF", "#CC79A7FF"), name = "")+
  scale_x_continuous(limits=c(0.5, 0.75), breaks=seq(0.5, 0.75, .05))+
  theme(legend.position = c(.3, .75),
        legend.title = element_blank(),
        legend.text = element_text(size = 14),
        axis.title = element_text(size = 14),
      axis.text.y = element_text(size = 12),
      strip.text = element_text(size = 14))+
  labs(x = "Mean 100x 10-fold Cross-Validated Concordance", y = "")

Code
# ggsave(here::here("manuscript", "figures", "single_concordance.svg"), width = 8, height = 8)
# ggsave(here::here("manuscript", "figures", "single_concordance.png"), dpi = 400, width = 10, height = 8)

Comparison with cadence estimates

Supplement?

Code
wt_single %>% 
  filter(grepl("peak", variable) | grepl("steps", variable)) %>% 
  mutate(var_group = factor(case_when(
    grepl("peak1", variable) ~ "Peak 1-min variable",
    grepl("peak30", variable) ~ "Peak 30-min variable",
    TRUE ~ "Mean daily total variable"
  ), levels = c("Peak 1-min variable", "Peak 30-min variable", "Mean daily total variable"))) %>% 
  group_by(variable, var_group) %>% 
  summarize(mean = mean(concordance),
            sd = sd(concordance),
            se = sd(concordance)/sqrt(n())) %>% 
  mutate(ci_low = mean - 1.96*se,
         ci_high = mean + 1.96*se) %>% 
  ungroup() %>% 
  left_join(var_labels, by = c("variable" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, mean),
         grp = "Male") %>% 
  ggplot(aes(y = labels, x = mean, xmin = ci_low, xmax = ci_high, color = var_group))+
  geom_point() + 
  geom_errorbarh() + 
  theme_bw() + 
  # facet_wrap(.~grp) +
  scale_color_manual(values = c("#E69F00FF", "#D55E00FF", "#56B4E9FF"), name = "")+
  scale_x_continuous(limits=c(0.687, 0.738), breaks=seq(0.675, 0.775, .0125))+
  theme(legend.position = c(.3, .7),
        legend.title = element_blank(),
        axis.text.y = element_text(size = 12),
        axis.title= element_text(size = 14),
        legend.text = element_text(size = 14),
        strip.text = element_text(size = 14))+
  labs(x = "Mean 100x 10-fold Cross-Validated Concordance", y = "")

Code
# ggsave(here::here("manuscript", "figures", "single_concordance_cadence.png"), width = 8, height = 8,
#        dpi = 350)

Hazard ratio forest plots

Figure 4?

Code
pa_vars = df_mortality_win %>% 
  select(contains("total") & contains("steps")) %>% 
  colnames()

df_mortality_win_scaled = 
  df_mortality_win %>% 
  mutate(across(c(contains("total")), ~scale(.x)))


fit_model = function(var, df){
  
  df = df %>% 
    mutate(weight = full_sample_2_year_mec_exam_weight / 2, weight_norm = weight / mean(weight))
  


  formula = as.formula(paste0("Surv(event_time, mortstat) ~", var, "+ 
      age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi +
      race_hispanic_origin +
      gender + 
      cat_bmi +
      cat_education +
      chf +
      chd +
      heartattack +
      stroke +
      cancer +
      bin_diabetes +
      cat_alcohol +
      cat_smoke +
      bin_mobilityproblem +
      general_health_condition"))
    coxph(formula, data = df, weights = weight_norm) %>% 
      tidy() %>% 
      filter(grepl(var, term))
    
}


steps_res = 
  map_dfr(.x = pa_vars,
          .f = fit_model,
          df = df_mortality_win) %>% 
  mutate(hr = exp(estimate * 500),
         ci_low = exp(500*(estimate - (1.96 * std.error))),
         ci_high = exp(500*(estimate + (1.96 * std.error))))

steps_res %>% 
  left_join(var_labels, by = c("term" = "names")) %>%
  mutate(labels = factor(labels),
         labels = fct_reorder(labels, hr, mean, .desc = TRUE),
         grp = "Raw") %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1)+
  theme_bw() + 
  scale_color_manual(values = c("#000000FF", "#265DABFF", "#DF5C24FF", "#059748FF", "#E5126FFF", "#9D722AFF", 
                                "#7B3A96FF", "#C7B42EFF", "#CB2027FF"),
                                labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.",
                                           "Stepcount SSL", "Stepcount RF")) + 
  # scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none",
        strip.text = element_text(size = 14)) + 
    scale_x_continuous(limits=c(0.725, 1), breaks=seq(0.7, 1, .05))+
  facet_wrap(.~grp)+
  geom_vline(aes(xintercept =  1), linetype = "dashed") +
  labs(x = "Adjusted HR Associated with 500-step Increase in Mean Steps per Day", y = "")

Code
# ggsave(here::here("manuscript", "figures", "forest_raw.png"), width = 8, height = 8, dpi = 400)


steps_res_scaled = 
  map_dfr(.x = c(pa_vars, "total_PAXMTSM", "total_AC"),
          .f = fit_model,
          df = df_mortality_win_scaled) %>% 
  mutate(hr = exp(estimate),
         ci_low = exp(estimate - (1.96 * std.error)),
         ci_high = exp(estimate + (1.96 * std.error))) %>% 
  left_join(var_labels, by = c("term" = "names"))

steps_res_scaled %>% 
  filter(grepl("steps", term)) %>% 
  mutate(term2 = factor(term),
         term2 = fct_reorder(term2, hr, .desc = TRUE),
         labels = factor(labels),
         labels = fct_reorder(labels, hr, .desc = TRUE),
         grp = "Scaled") %>%
  ggplot(aes(color = term)) +
  geom_point(aes(y = labels, x = hr), size = 2)+
  geom_errorbarh(aes(xmin = ci_low, xmax = ci_high, y = labels), linewidth = 1.1)+
  theme_bw() + 
  facet_wrap(.~grp) + 
   scale_color_manual(values = c("#000000FF", "#265DABFF", "#DF5C24FF", "#059748FF", "#E5126FFF", "#9D722AFF", 
                                "#7B3A96FF", "#C7B42EFF", "#CB2027FF"),
                                labels = c("Actilife", "ADEPT", "Oak", "Verisense", "Verisense rev.",
                                           "Stepcount SSL", "Stepcount RF")) + 
  # scale_color_paletteer_d("ggthemes::few_Dark")+
  theme(legend.position = "none",
        strip.text = element_text(size = 14)) + 
  scale_x_continuous(limits=c(0.45, 1), breaks=seq(0.35, 1, .05))+
  geom_vline(aes(xintercept = 1), linetype = "dashed")+
  labs(x = "Adjusted HR Associated with One SD Increase in Mean Steps per Day", y = "")

Code
# ggsave(here::here("manuscript", "figures", "forest_scaled.png"), width = 8, height = 8, dpi = 400)

tb = steps_res %>% 
  select(term, hr, p.value, ci_low, ci_high) %>% 
  mutate(type = "raw") %>% 
  bind_rows(steps_res_scaled %>% select(term, hr, p.value, ci_low, ci_high) %>% mutate(type = "scaled")) %>% 
  pivot_wider(names_from = type, values_from = -term) %>% 
  filter(grepl("step", term)) %>% 
    arrange(desc(hr_scaled)) %>% 
    mutate(variable_fac = forcats::fct_reorder(term, hr_scaled, mean), 
           ci_raw = paste0(sprintf("%0.3f",round(hr_raw, 3)),
                       " (", sprintf("%0.3f",round(ci_low_raw, 3)), ", ", sprintf("%0.3f",round(ci_high_raw, 3)), ")"),
           ci_sc = paste0(sprintf("%0.3f",round(hr_scaled, 3)),
                       " (", sprintf("%0.3f",round(ci_low_scaled, 3)), ", ", sprintf("%0.3f",round(ci_high_scaled, 3)), ")")) %>%
    select(variable_fac, ci_sc, ci_raw) %>% 
    gt::gt() %>%
    cols_align(columns = everything(), align = "left") %>% 
    as.data.frame() %>% 
    kableExtra::kbl("latex", booktabs = TRUE)

Tables

Table 1: Demographics

Code
# add labels 

name_vec = colnames(df_accel)
labs = c("SEQN", "Data release cycle",
         "Interview Examination Status",
         "Sex", "Age (yrs)", "Age (mos)", "Race/ethnicity",
         "Six month time period",
         "Educ. level adults", "Marital status",
         "2 yr int weight", "2 yr exam weight", "Pseudo PSU",
         "Psueudo stratum", 
         "Annual HH income", 
         "Weight (kg)", "Height (cm)", "BMI (kg/m2)", 
         "Overweight", "Diabetes orig.", "Diabetes", "Arthritis", 
         "Coronary Heart Failure", "Congenital Heart Disease", "Angina", 
         "Heart attack", "Stroke", "Cancer", colnames(df_all)[29:33],
         "Alcohol use", "BMI Category", "Smoking status", "Mobility problem", "General health condition",
         "Eligbility", "Died by 5 years follow up", "COD", "COD Diabetes", "COD Hypertension", 
         "Person-months follow up from interview", "Person-months follow up from exam", "Eligibility category", "COD category",  "AC", "MIMS",
                     "Actilife steps", "ADEPT steps",
                    "log10 AC", "log10 MIMS", "Oak steps",
                     "Sc. RF steps", "Sc. SSL steps",
                    "Verisense rev. steps", "Verisense steps",
                    "Peak1 AC", "Peak1 MIMS",
                     "Peak1 Actilife steps", "Peak1 ADEPT steps",
                    "Peak1 log10 AC", "Peak1 log10 MIMS", "Peak1 Oak steps",
                     "Peak1 Sc. RF steps", "Peak1 Sc. SSL steps",
                    "Peak1 Verisense rev. steps", "Peak1 Verisense steps",
                     "Peak30 AC", "Peak30 MIMS",
                     "Peak30 Actilife steps", "Peak30 ADEPT steps",
                    "Peak30 log10 AC", "Peak30 log10 MIMS", "Peak30 Oak steps",
                     "Peak30 Sc. RF steps", "Peak30 Sc. SSL steps",
                    "Peak30 Verisense rev. steps", "Peak30 Verisense steps", "No. valid days", "Received accelerometer", 
         "Valid accelerometry", "Inclusion category", "Education level")



names(labs) = name_vec

df_all = 
  df_accel %>% 
  labelled::set_variable_labels(!!!labs)
# survey weighted table 
# question about this!! 
df_svy =
  df_all %>% 
  filter(has_accel) %>% 
  filter(valid_accel) %>% ### do we want to do this? 
  select(
    gender,
    age_in_years_at_screening,
    race_hispanic_origin,
    cat_education,
    cat_bmi,
    bin_diabetes,
    chf,
    chd,
    stroke,
    cat_alcohol,
    cat_smoke,
    bin_mobilityproblem,
    general_health_condition,
    mortstat,
    data_release_cycle,
    masked_variance_pseudo_psu, masked_variance_pseudo_stratum,
    full_sample_2_year_mec_exam_weight
  )  %>%
  mutate(WTMEC4YR = full_sample_2_year_mec_exam_weight/2,
         WTMEC4YR_norm = WTMEC4YR/mean(WTMEC4YR, na.rm = TRUE)) %>%
  select(-full_sample_2_year_mec_exam_weight) %>%
  svydesign(ids = ~masked_variance_pseudo_psu, weights = ~WTMEC4YR_norm, 
            strata = ~masked_variance_pseudo_stratum, data=., nest=TRUE)

df_svy %>%
  tbl_svysummary(
    by = data_release_cycle,
    include = -c(masked_variance_pseudo_psu, masked_variance_pseudo_stratum, WTMEC4YR,
                 WTMEC4YR_norm),
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    digits = all_continuous() ~ 2,
    missing_text = "Missing",
  ) %>%
  add_overall() %>% 
  modify_caption("Demographic Characteristics, All Adults")
Demographic Characteristics, All Adults
Characteristic Overall, N = 8,6641 7, N = 4,3671 8, N = 4,2971
Sex
    Female 4,552 (53%) 2,281 (52%) 2,271 (53%)
    Male 4,112 (47%) 2,086 (48%) 2,027 (47%)
Age (yrs) 48.01 (17.41) 47.99 (17.29) 48.04 (17.53)
Race/ethnicity
    Non-Hispanic White 5,785 (67%) 2,918 (67%) 2,867 (67%)
    Non-Hispanic Black 979 (11%) 505 (12%) 475 (11%)
    Other Race - Including Multi-Rac 640 (7.4%) 312 (7.2%) 327 (7.6%)
    Mexican American 736 (8.5%) 344 (7.9%) 393 (9.1%)
    Other Hispanic 524 (6.0%) 288 (6.6%) 236 (5.5%)
Education level
    More than HS 5,279 (63%) 2,664 (63%) 2,615 (63%)
    Less than HS 1,330 (16%) 703 (17%) 627 (15%)
    HS/HS equivalent 1,788 (21%) 877 (21%) 911 (22%)
    Missing 267 123 144
BMI Category
    Normal 2,436 (28%) 1,258 (29%) 1,178 (28%)
    Obese 3,176 (37%) 1,538 (36%) 1,639 (38%)
    Overweight 2,825 (33%) 1,445 (33%) 1,380 (32%)
    Underweight 156 (1.8%) 84 (2.0%) 71 (1.7%)
    Missing 70 41 29
Diabetes 887 (10%) 430 (9.9%) 457 (11%)
    Missing 2 0 2
Coronary Heart Failure 247 (2.9%) 135 (3.2%) 113 (2.7%)
    Missing 270 127 143
Congenital Heart Disease 316 (3.8%) 145 (3.4%) 171 (4.1%)
    Missing 283 133 150
Stroke 263 (3.1%) 140 (3.3%) 124 (3.0%)
    Missing 267 124 143
Alcohol use
    Never drinker 1,057 (12%) 477 (11%) 581 (14%)
    Former drinker 1,233 (14%) 619 (14%) 614 (14%)
    Moderate drinker 2,657 (31%) 1,401 (32%) 1,256 (29%)
    Heavy drinker 669 (7.7%) 374 (8.6%) 296 (6.9%)
    Missing alcohol 3,048 (35%) 1,496 (34%) 1,552 (36%)
Smoking status
    Never smoker 4,820 (56%) 2,351 (55%) 2,470 (57%)
    Former smoker 2,087 (24%) 1,071 (25%) 1,016 (24%)
    Current smoker 1,630 (19%) 820 (19%) 810 (19%)
    Missing 127 126 1
Mobility problem 1,411 (17%) 638 (15%) 773 (19%)
    Missing 270 123 146
General health condition
    Poor 232 (2.7%) 106 (2.4%) 126 (2.9%)
    Fair 1,323 (15%) 612 (14%) 711 (17%)
    Good 3,408 (39%) 1,670 (38%) 1,738 (40%)
    Very good 2,742 (32%) 1,442 (33%) 1,300 (30%)
    Excellent 959 (11%) 536 (12%) 423 (9.8%)
Died by 5 years follow up 648 (7.5%) 354 (8.1%) 294 (6.8%)
    Missing 15 12 4
1 n (%); Mean (SD)
Code
df_svy =
  df_all %>%
  filter(has_accel) %>%
  filter(valid_accel) %>%
  mutate(
    mortstat_fac = factor(
      mortstat,
      levels = c(0, 1),
      labels = c("Alive", "Deceased")
    ),
    svy_year_fac = factor(
      data_release_cycle,
      levels = c(7, 8),
      labels = c("2011-2012", "2013-2014")
    ),
    WTMEC4YR = full_sample_2_year_mec_exam_weight / 2,
    WTMEC4YR_norm = WTMEC4YR / mean(WTMEC4YR)
  ) %>%
  svydesign(
    ids = ~ masked_variance_pseudo_psu,
    weights = ~ WTMEC4YR_norm,
    strata = ~ masked_variance_pseudo_stratum,
    data = .,
    nest = TRUE
  )
vars_table1 = c("age_years_interview","gender","race","BMI_cat","education_adult",
                 "overall_health_combined","diabetes","heart_attack","CHF","CHD","stroke","cancer",
                 "mobility_problem", "alcohol_consumption_fac","cigarette_smoking","TMIMS_mean","TAT_mean","TMVT_mean","ASTP_mean","RA_MIMS_mean",
                 "mortstat_fac")

vars_table1 = c( "gender",
   "age_in_years_at_screening",
    "race_hispanic_origin",
    "cat_education",
    "cat_bmi",
    "bin_diabetes",
    "chf",
    "chd",
    "stroke",
    "cat_alcohol",
    "cat_smoke",
    "bin_mobilityproblem",
    "general_health_condition",
    "mortstat_fac")
  
table_1 = tableone::svyCreateTableOne(vars=vars_table1, strata="svy_year_fac", test=TRUE, data=df_svy, addOverall = TRUE)

Table 2: Physical Activity

Option 1: by wave and age

Code
# with PA

df_adult = df_accel %>% 
  filter(age_in_years_at_screening >= 18) %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "All adults")

df_mort = df_mortality %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "50-79y.o") 


df = bind_rows(df_adult, df_mort) 

# df = 
#   df %>% 
#     labelled::set_variable_labels(!!!labs)

df = df %>% 
  rename(Verisense = "total_vssteps",
         "Verisense rev" = "total_vsrevsteps",
         Oak = "total_oaksteps",
         ADEPT = "total_adeptsteps",
         SCRF =  "total_scrfsteps",
         SCSSL =  "total_scsslsteps",
         MIMS = "total_PAXMTSM",
         Actilife = "total_actisteps",
         log10MIMS = "total_log10PAXMTSM",
         log10AC = "total_log10AC",
         AC = "total_AC") %>% 
  select(!contains("peak"))

df_analysis_svy = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df,
  nest = TRUE
)


df_analysis_svy %>% 
  tbl_strata(
    strata = data_release_cycle,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = c(contains("Verisense"), contains("AC", ignore.case = F),
                                 contains("Oak"), contains("ADEPT"), contains("SCRF"), contains("SCSSL"), contains("MIMS"), contains("actilife")),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**Wave {strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Mean Totals Stratified by Age and Wave")
Physical Activity Mean Totals Stratified by Age and Wave
Characteristic Wave 7, N = 6244 Wave 8, N = 6140
50-79y.o, N = 1,8771 All adults, N = 4,3671 50-79y.o, N = 1,8431 All adults, N = 4,2971
Verisense rev 8,025 (4,422) 9,102 (4,756) 7,553 (4,337) 8,725 (4,730)
Verisense 8,784 (3,885) 9,497 (4,062) 8,374 (3,922) 9,163 (4,065)
AC 2,418,849 (765,529) 2,573,262 (815,192) 2,364,933 (745,731) 2,549,846 (816,305)
log10AC 2,916 (358) 2,955 (366) 2,879 (377) 2,933 (370)
Oak 10,853 (4,837) 11,794 (5,061) 10,256 (4,746) 11,381 (5,065)
ADEPT 2,500 (1,564) 2,659 (1,569) 2,287 (1,687) 2,453 (1,575)
SCRF 10,529 (5,345) 11,509 (5,722) 10,068 (5,311) 11,263 (5,764)
SCSSL 8,814 (4,268) 9,144 (4,400) 8,423 (4,309) 8,846 (4,399)
log10MIMS 976 (150) 998 (154) 966 (154) 994 (155)
MIMS 12,823 (3,519) 13,572 (3,758) 12,578 (3,477) 13,467 (3,784)
Actilife 11,639 (3,851) 12,169 (3,995) 11,237 (3,907) 11,902 (4,048)
1 Mean (SD)
Code
df %>% 
  tbl_strata(
    strata = data_release_cycle,
    .tbl_fun =
      ~ .x %>%
      tbl_summary(.,
                     include = c(contains("Verisense"), contains("AC", ignore.case = F),
                                 contains("Oak"), contains("ADEPT"), contains("SCRF"), contains("SCSSL"), contains("MIMS"), contains("actilife")),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**Wave {strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Mean Totals Stratified by Age and Wave - Unweighted")
Physical Activity Mean Totals Stratified by Age and Wave - Unweighted
Characteristic Wave 7, N = 6146 Wave 8, N = 6238
50-79y.o, N = 1,8431 All adults, N = 4,3031 50-79y.o, N = 1,8771 All adults, N = 4,3611
Verisense rev 7,748 (4,700) 8,895 (4,967) 7,550 (4,529) 8,740 (4,951)
Verisense 8,609 (4,214) 9,359 (4,291) 8,436 (4,117) 9,204 (4,258)
AC 2,392,776 (809,386) 2,547,733 (850,056) 2,383,231 (785,320) 2,554,633 (851,047)
log10AC 2,921 (389) 2,958 (384) 2,901 (386) 2,942 (380)
Oak 10,494 (5,156) 11,532 (5,314) 10,268 (4,986) 11,393 (5,316)
ADEPT 2,327 (1,623) 2,538 (1,626) 2,230 (1,636) 2,426 (1,583)
SCRF 10,121 (5,672) 11,220 (5,987) 10,081 (5,605) 11,264 (6,059)
SCSSL 8,459 (4,545) 8,893 (4,599) 8,377 (4,542) 8,807 (4,610)
log10MIMS 975 (162) 997 (162) 973 (159) 996 (160)
MIMS 12,712 (3,742) 13,467 (3,923) 12,677 (3,650) 13,497 (3,939)
Actilife 11,504 (4,182) 12,057 (4,216) 11,341 (4,075) 11,953 (4,216)
1 Mean (SD)

Supplement: peak values

Code
df_adult = df_accel %>% 
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "All adults")

df_mort = df_mortality %>%
  mutate(weight = full_sample_2_year_mec_exam_weight / 2,
         weight_norm = weight / mean(weight)) %>%
  mutate(group = "50-79y.o") 


df = bind_rows(df_adult, df_mort) 

df_analysis_svy = survey::svydesign(
  id = ~ masked_variance_pseudo_psu,
  strata = ~ masked_variance_pseudo_stratum,
  weights = ~ weight_norm,
  data = df,
  nest = TRUE
)
df_analysis_svy %>% 
  tbl_strata(
    strata = gender,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = contains("peak1"),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**{strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Peak 1 Min Values Stratified by Age and Sex")
Physical Activity Peak 1 Min Values Stratified by Age and Sex
Characteristic Female, N = 6552 Male, N = 5832
50-79y.o, N = 2,0001 All adults, N = 4,5521 50-79y.o, N = 1,7201 All adults, N = 4,1121
peak1_AC 12,348 (2,710) 13,379 (3,459) 12,056 (3,418) 13,383 (3,867)
peak1_actisteps 71 (18) 74 (18) 82 (18) 84 (17)
peak1_adeptsteps 63 (26) 66 (25) 68 (23) 70 (21)
peak1_log10AC 4.08 (0.08) 4.11 (0.10) 4.06 (0.11) 4.10 (0.11)
peak1_log10PAXMTSM 1.73 (0.08) 1.76 (0.10) 1.73 (0.10) 1.77 (0.12)
peak1_oaksteps 95 (18) 98 (18) 97 (15) 100 (14)
peak1_PAXMTSM 54 (11) 59 (16) 55 (16) 62 (20)
peak1_scrfsteps 102 (14) 104 (14) 101 (11) 104 (11)
peak1_scsslsteps 101 (17) 103 (17) 101 (15) 104 (15)
peak1_vsrevsteps 87 (20) 91 (20) 91 (18) 95 (17)
peak1_vssteps 82 (20) 84 (19) 86 (17) 88 (16)
1 Mean (SD)
Code
df_analysis_svy %>% 
  tbl_strata(
    strata = gender,
    .tbl_fun =
      ~ .x %>%
      tbl_svysummary(.,
                     include = contains("peak30"),
                     by = group,
                     statistic = list(
                       all_continuous() ~ "{mean} ({sd})",
                       all_categorical() ~ "{n} ({p}%)"
                     )),
    .header = "**{strata}**, N = {n}"
  ) %>% 
  modify_caption("Physical Activity Peak 30 Min Values Stratified by Age and Sex")
Physical Activity Peak 30 Min Values Stratified by Age and Sex
Characteristic Female, N = 6552 Male, N = 5832
50-79y.o, N = 2,0001 All adults, N = 4,5521 50-79y.o, N = 1,7201 All adults, N = 4,1121
peak30_AC 8,957 (1,693) 9,618 (2,216) 8,424 (2,111) 9,255 (2,381)
peak30_actisteps 47 (13) 49 (13) 56 (16) 58 (15)
peak30_adeptsteps 31 (19) 32 (18) 37 (18) 38 (17)
peak30_log10AC 3.94 (0.08) 3.96 (0.10) 3.91 (0.10) 3.94 (0.10)
peak30_log10PAXMTSM 1.60 (0.07) 1.63 (0.09) 1.58 (0.09) 1.62 (0.10)
peak30_oaksteps 66 (18) 69 (18) 71 (18) 74 (17)
peak30_PAXMTSM 40 (7) 43 (10) 39 (10) 43 (12)
peak30_scrfsteps 79 (18) 82 (18) 82 (16) 85 (15)
peak30_scsslsteps 76 (20) 79 (20) 80 (18) 84 (18)
peak30_vsrevsteps 58 (19) 62 (20) 64 (20) 68 (19)
peak30_vssteps 53 (17) 55 (17) 59 (18) 61 (16)
1 Mean (SD)

Single variable mortality prediction

Table 3 or supplement since duplicative of figure

Code
vlabs = c(
      "Age (yrs)",
      "Diabetes",
      "Mobility problem",
      "Cancer",
      "Alcohol use",
      "BMI Category",
      "Education category",
      "Smoking status",
      "Congenital Heart Disease",
      "Coronary Heart Failure",
      "Gender",
      "General health condition",
      "Heart attack",
      "Peak1 AC",
      "Peak1 Actilife steps",
      "Peak1 ADEPT steps",
      "Peak1 log10 AC",
      "Peak1 log10 MIMS",
      "Peak1 Oak steps",
      "Peak1 MIMS",
      "Peak1 Stepcount RF steps",
      "Peak1 Stepcount SSL steps",
      "Peak1 Verisense rev. steps",
      "Peak1 Verisense steps",
       "Peak30 AC",
      "Peak30 Actilife steps",
      "Peak30 ADEPT steps",
      "Peak30 log10 AC",
      "Peak30 log10 MIMS",
      "Peak30 Oak steps",
      "Peak30 MIMS",
      "Peak30 Stepcount RF steps",
      "Peak30 Stepcount SSL steps",
      "Peak30 Verisense rev. steps",
      "Peak30 Verisense steps",
      "Race/ethnicity",
      "Stroke",
      "AC",
      "Actilife steps",
      "ADEPT steps",
      "log10 AC",
      "log10 MIMS",
      "Oak steps",
      "MIMS",
      "Stepcount RF steps",
      "Stepcount SSL steps",
      "Verisense rev. steps",
      "Verisense steps"
    )
data_summary = wt_single %>%
  group_by(variable) %>%
  mutate(ind = row_number()) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable,
    labels = vlabs
  )) %>% 
  filter(!grepl("peak", variable))

df_means =
  df_mortality_win %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = wt_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = wt_single %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
Stepcount RF steps 0.732 (0.728, 0.736) 10,591 6,502 ---
ADEPT steps 0.725 (0.720, 0.729) 2,391 1,380 0.007
Oak steps 0.723 (0.719, 0.727) 10,845 7,067 0.001
Verisense rev. steps 0.720 (0.716, 0.724) 8,021 4,878 <0.001
Verisense steps 0.716 (0.712, 0.720) 8,885 5,885 <0.001
Actilife steps 0.710 (0.706, 0.714) 11,790 8,740 <0.001
Stepcount SSL steps 0.702 (0.698, 0.706) 8,786 5,784 <0.001
AC 0.688 (0.684, 0.692) 2,454,243 1,900,197 <0.001
MIMS 0.682 (0.678, 0.686) 13,002 10,438 <0.001
Mobility problem 0.675 (0.672, 0.679) NA NA <0.001
Age (yrs) 0.673 (0.669, 0.677) NA NA <0.001
General health condition 0.662 (0.659, 0.666) NA NA <0.001
log10 MIMS 0.647 (0.643, 0.651) 987 884 <0.001
log10 AC 0.630 (0.626, 0.634) 2,939 2,708 <0.001
Diabetes 0.594 (0.590, 0.597) NA NA <0.001
Smoking status 0.589 (0.586, 0.592) NA NA <0.001
Education category 0.572 (0.569, 0.576) NA NA <0.001
Alcohol use 0.551 (0.547, 0.555) NA NA <0.001
Congenital Heart Disease 0.551 (0.549, 0.553) NA NA <0.001
Coronary Heart Failure 0.549 (0.547, 0.551) NA NA <0.001
Gender 0.547 (0.543, 0.550) NA NA <0.001
Heart attack 0.546 (0.544, 0.548) NA NA <0.001
Cancer 0.539 (0.536, 0.542) NA NA <0.001
Stroke 0.533 (0.531, 0.535) NA NA <0.001
BMI Category 0.524 (0.521, 0.528) NA NA <0.001
Race/ethnicity 0.524 (0.522, 0.526) NA NA <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance
Code
  tb = data_summary %>%
    left_join(t_tests) %>%
    left_join(df_means) %>% 
    arrange(desc(concordance_mean)) %>% 
    mutate(variable_fac = forcats::fct_reorder(variable_fac, concordance_mean), 
          lb = concordance_mean - 1.96 * concordance_se,
           ub = concordance_mean + 1.96 * concordance_se,
           ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")")) %>%
    select(variable_fac, ci, p.value, total_alive = died_0, total_deceased = died_1) %>%
    mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
    mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
           across(starts_with("mean"), ~if_else(is.na(.x), "---", as.character(round(.x, 0))))) %>% 
    select(-p.value) %>% 
    rename(Variable = variable_fac,
           "Mean (95% CI)" = ci,
           "p-value" = pvalue,
           "Mean value among alive" = total_alive,
           "Mean value among deceased" = total_deceased) %>%
    gt::gt() %>%
    gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance") %>%
    tab_footnote(
      footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
      locations = cells_column_labels(columns = "p-value")
    ) %>%
    cols_align(columns = everything(), align = "left") %>% 
    as.data.frame() %>% 
    kableExtra::kbl("latex", booktabs = TRUE)
Code
data_summary = wt_single %>%
  group_by(variable) %>%
  mutate(ind = row_number()) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable,
    labels = vlabs
  )) 

df_means =
  df_mortality_win %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = wt_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = wt_single %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
Peak30 AC 0.733 (0.730, 0.736) 8,725 7,249 ---
Peak30 Stepcount RF steps 0.732 (0.728, 0.736) 81 64 0.348
Peak30 MIMS 0.732 (0.729, 0.735) 40 33 0.328
Stepcount RF steps 0.732 (0.728, 0.736) 10,591 6,502 0.334
Peak30 log10 AC 0.731 (0.728, 0.735) 4 4 0.235
Peak30 log10 MIMS 0.731 (0.728, 0.734) 2 2 0.168
Peak1 log10 AC 0.727 (0.724, 0.731) 4 4 0.006
Peak1 AC 0.727 (0.724, 0.730) 12,292 10,235 0.004
Peak1 Verisense rev. steps 0.727 (0.723, 0.730) 90 72 0.004
Peak1 Stepcount RF steps 0.726 (0.722, 0.730) 102 90 0.002
Peak30 Verisense rev. steps 0.726 (0.722, 0.729) 62 45 0.001
ADEPT steps 0.725 (0.720, 0.729) 2,391 1,380 <0.001
Oak steps 0.723 (0.719, 0.727) 10,845 7,067 <0.001
Peak1 log10 MIMS 0.723 (0.720, 0.726) 2 2 <0.001
Peak1 MIMS 0.722 (0.719, 0.725) 54 45 <0.001
Peak30 Oak steps 0.721 (0.717, 0.725) 69 53 <0.001
Verisense rev. steps 0.720 (0.716, 0.724) 8,021 4,878 <0.001
Verisense steps 0.716 (0.712, 0.720) 8,885 5,885 <0.001
Peak1 Stepcount SSL steps 0.715 (0.711, 0.718) 102 87 <0.001
Peak30 Stepcount SSL steps 0.714 (0.710, 0.718) 79 61 <0.001
Peak30 Actilife steps 0.714 (0.710, 0.718) 52 41 <0.001
Peak30 Verisense steps 0.713 (0.709, 0.717) 57 43 <0.001
Peak30 ADEPT steps 0.713 (0.709, 0.717) 34 22 <0.001
Actilife steps 0.710 (0.706, 0.714) 11,790 8,740 <0.001
Peak1 Oak steps 0.710 (0.706, 0.714) 96 81 <0.001
Peak1 Verisense steps 0.704 (0.700, 0.708) 85 68 <0.001
Peak1 Actilife steps 0.703 (0.699, 0.707) 77 62 <0.001
Stepcount SSL steps 0.702 (0.698, 0.706) 8,786 5,784 <0.001
Peak1 ADEPT steps 0.693 (0.689, 0.697) 66 47 <0.001
AC 0.688 (0.684, 0.692) 2,454,243 1,900,197 <0.001
MIMS 0.682 (0.678, 0.686) 13,002 10,438 <0.001
Mobility problem 0.675 (0.672, 0.679) NA NA <0.001
Age (yrs) 0.673 (0.669, 0.677) NA NA <0.001
General health condition 0.662 (0.659, 0.666) NA NA <0.001
log10 MIMS 0.647 (0.643, 0.651) 987 884 <0.001
log10 AC 0.630 (0.626, 0.634) 2,939 2,708 <0.001
Diabetes 0.594 (0.590, 0.597) NA NA <0.001
Smoking status 0.589 (0.586, 0.592) NA NA <0.001
Education category 0.572 (0.569, 0.576) NA NA <0.001
Alcohol use 0.551 (0.547, 0.555) NA NA <0.001
Congenital Heart Disease 0.551 (0.549, 0.553) NA NA <0.001
Coronary Heart Failure 0.549 (0.547, 0.551) NA NA <0.001
Gender 0.547 (0.543, 0.550) NA NA <0.001
Heart attack 0.546 (0.544, 0.548) NA NA <0.001
Cancer 0.539 (0.536, 0.542) NA NA <0.001
Stroke 0.533 (0.531, 0.535) NA NA <0.001
BMI Category 0.524 (0.521, 0.528) NA NA <0.001
Race/ethnicity 0.524 (0.522, 0.526) NA NA <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance

Multivariable mortality prediction - concordance and model summaries

Still trying to figure out how to present this

Code
wt_small = 
  wt_all %>% 
  filter(model %in% c("Demo only", "scrfsteps+PAXMTSM", "scrfsteps", "PAXMTSM"))

summary = 
  wt_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics only",
                                              "Demographics + MIMS", 
                                              "Demographics + SCRF steps", 
                                              "Demographics + SCRF steps + MIMS")))

coef_scrf = 
  df_mortality_win %>%  
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrfsteps")
  
coef_mims = 
  df_mortality_win %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_PAXMTSM + total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrfsteps+PAXMTSM")
  
coefs = bind_rows(coef_scrf, coef_mims)
summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison") %>%
    cols_align(columns = everything(), align = "left")
Multivariable Model Comparison
Model Steps HR Steps p-value Model concordance
Demographics only --- NA 0.769 (0.766, 0.773)
Demographics + MIMS --- NA 0.773 (0.770, 0.777)
Demographics + SCRF steps 0.955 (0.940, 0.970) 5.85e-05 0.776 (0.772, 0.779)
Demographics + SCRF steps + MIMS 0.961 (0.939, 0.983) 0.0361 0.774 (0.770, 0.777)
Code
tb = 
  summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison",
               subtitle = "Male") %>%
    cols_align(columns = everything(), align = "left") %>% 
  data.frame() %>%
  kableExtra::kbl(format = "latex")
Code
model_steps_vs = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .)

model_steps_vs %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_mims = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_vs_mims = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + total_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_vs_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_vs_ac = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + total_AC + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_vs_ac %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 
Code
model_steps_sc = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .)

model_steps_sc %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_mims = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_mims = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + total_PAXMTSM + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_mims %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_ac = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + total_AC + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_ac %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("mean", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

With step cadence

Code
wt_female_cad = readRDS(here::here("results", "metrics_wtd_100_female_cadence.rds"))


wt_female_small = 
  wt_female_cad %>% 
  filter(model %in% c("scrfsteps", "scrfsteps+peak1_scrfsteps",
                      "peak1_scrfsteps"))

summary = 
  wt_female_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics + Peak1 Cadence", 
                                                "Demographics + Total Steps", 
                                                "Demographics + Steps + Peak1 Cadence")))

coef_steps = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         step_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrfsteps") %>% 
  rename(step_p = p.value) %>% 
  select(model, step_ci, step_p)

  
coef_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate *10), 
         lb = exp((estimate - 1.96 * std.error)*10 ),
         ub = exp((estimate + 1.96 * std.error)*10),
         cad_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "peak1_scrfsteps") %>% 
  rename(cad_p = p.value) %>% 
  select(model, cad_ci, cad_p)


coef_both = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ peak1_scrfsteps + total_scrfsteps +
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term))
  
coef_cad_steps = 
  coef_both %>% 
  filter(grepl("peak", term)) %>% 
  mutate(hr = exp(estimate*10), 
         lb = exp((estimate - 1.96 * std.error)*10 ),
         ub = exp((estimate + 1.96 * std.error)*10),
         cad_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrfsteps+peak1_scrfsteps") %>% 
  rename(cad_p = p.value)

coef_cad_steps2 = 
  coef_both %>% 
  filter(!grepl("peak", term)) %>% 
  mutate(hr = exp(estimate *500), 
         lb = exp((estimate - 1.96 * std.error) *500),
         ub = exp((estimate + 1.96 * std.error)*500 ),
         step_ci = paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"), model = "scrfsteps+peak1_scrfsteps") %>% 
  rename(step_p = p.value)

coef2 = coef_cad_steps %>% 
  left_join(coef_cad_steps2, by = "model") %>% 
  select(model, cad_ci, cad_p, step_ci, step_p) %>% 
  bind_rows(coef_steps) %>% 
  bind_rows(coef_cad)


summary %>% 
  left_join(coef2)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         across(contains("_p"), ~format.pval(.x, digits = 3))) %>% 
  select(model_fac, step_ci, step_p, cad_ci, cad_p, conc_ci)   %>% 
  rename(Model = model_fac,
         "Steps HR" = step_ci,
         "Steps p-value" = step_p,
         "Cadence HR" = cad_ci,
         "Cadence p-value" = cad_p,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Comparison of Addition of Cadence Variables",
               subtitle = "Female") %>%
    cols_align(columns = everything(), align = "left")
Code
wt_male_cad = readRDS(here::here("results", "metrics_wtd_100_male_cadence.rds"))
make_multivar_tab(wt_male_cad, subt = "Male")

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + peak1_vsrevsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("revised", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Male") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_vsrevsteps + peak30_vsrevsteps +
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 


model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("revised", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 
Code
wt_female_cad = readRDS(here::here("results", "metrics_wtd_100_female_cadence.rds"))
make_multivar_tab(wt_female_cad, subt = "Female")

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + peak1_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 

model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("scrf", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

model_steps_sc_cad = 
  df_mortality_win %>% 
  filter(gender == "Female") %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + peak30_scrfsteps + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) 


model_steps_sc_cad %>% 
  tidy() %>% 
  select(term, estimate, robust.se, p.value) %>% 
  arrange(p.value) %>% 
  mutate(sig = case_when(
           p.value < 0.001 ~ "***",
           p.value < 0.01 ~ "**", 
           p.value < 0.05 ~ "*",
           p.value < 0.1 ~ ".",
           TRUE ~ ""),
         p.value = style_pvalue(p.value, digits = 2)) %>% 
  gt() %>% 
  tab_style(
    style = list(
      cell_fill(color = "lightcyan"),
      cell_text(weight = "bold")
      ),
    locations = cells_body(
      columns = everything(),
      rows = grepl("scrf", term)
    )
  ) %>% 
  fmt_number(columns = c(estimate, robust.se), n_sigfig = 2) 

Sensivity analysis

Use anyone with at least one valid day, compute results

Single variable

Code
sens_single = readRDS(here::here("results", "metrics_wtd_100_singlevar_sens.rds"))  %>% 
  filter(!grepl("peak", variable))


df_mortality_sens =
  df_all %>%
  filter(num_valid_days > 0) %>%
  filter(age_in_years_at_screening >= 50 &
           age_in_years_at_screening < 80)  %>% 
  filter(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm, total_PAXMTSM),
                ~!is.na(.x))) %>%
  mutate(event_time = permth_exm / 12) %>% 
  ungroup() %>%
  mutate(across(c(contains("total"), contains("peak")), ~DescTools::Winsorize(.x, probs = c(0, 0.99)))) 

data_summary = sens_single %>%
  group_by(variable) %>%
  mutate(ind = row_number()) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable
    # labels = vlabs
  ))

df_means =
  df_mortality_sens %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = sens_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = sens_single %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")
100x 10-fold Cross-Validated Single Variable Concordance
Variable Mean (95% CI) Mean value among alive Mean value among deceased p-value1
total_scrfsteps 0.732 (0.728, 0.736) 10,591 6,502 ---
total_adeptsteps 0.724 (0.720, 0.728) 2,391 1,380 0.002
total_oaksteps 0.723 (0.720, 0.727) 10,845 7,067 <0.001
total_vsrevsteps 0.719 (0.715, 0.723) 8,021 4,878 <0.001
total_vssteps 0.716 (0.712, 0.720) 8,885 5,885 <0.001
total_actisteps 0.711 (0.707, 0.714) 11,790 8,740 <0.001
total_scsslsteps 0.703 (0.699, 0.707) 8,786 5,784 <0.001
total_AC 0.689 (0.685, 0.692) 2,454,243 1,900,197 <0.001
total_PAXMTSM 0.683 (0.680, 0.687) 13,002 10,438 <0.001
bin_mobilityproblem 0.675 (0.672, 0.678) NA NA <0.001
age_in_years_at_screening 0.672 (0.668, 0.675) NA NA <0.001
general_health_condition 0.664 (0.661, 0.668) NA NA <0.001
total_log10PAXMTSM 0.650 (0.646, 0.654) 987 884 <0.001
total_log10AC 0.633 (0.629, 0.637) 2,939 2,708 <0.001
bin_diabetes 0.591 (0.588, 0.594) NA NA <0.001
cat_smoke 0.587 (0.584, 0.591) NA NA <0.001
cat_education 0.574 (0.571, 0.577) NA NA <0.001
chd 0.551 (0.549, 0.553) NA NA <0.001
cat_alcohol 0.550 (0.546, 0.554) NA NA <0.001
chf 0.549 (0.547, 0.551) NA NA <0.001
gender 0.546 (0.542, 0.549) NA NA <0.001
heartattack 0.546 (0.544, 0.547) NA NA <0.001
cancer 0.538 (0.536, 0.541) NA NA <0.001
stroke 0.532 (0.530, 0.533) NA NA <0.001
race_hispanic_origin 0.526 (0.525, 0.528) NA NA <0.001
cat_bmi 0.519 (0.515, 0.522) NA NA <0.001
1 p-value for one sided t-test comparing variable to variable with higest mean concordance
Code
sens_male_single = readRDS(here::here("results", "metrics_wtd_100_singlevar_male_sens.rds"))  %>% 
  filter(!grepl("peak", variable))
sens_female_single = readRDS(here::here("results", "metrics_wtd_100_singlevar_female_sens.rds")) %>% 
  filter(!grepl("peak", variable))

df_mortality_sens =
  df_all %>%
  filter(num_valid_days > 0) %>%
  filter(age_in_years_at_screening >= 50 &
           age_in_years_at_screening < 80)  %>% 
  filter(if_all(.cols = c(age_in_years_at_screening, gender,
                          race_hispanic_origin, cat_education,
                          cat_bmi, chd, chf, heartattack, stroke, cancer,
                          bin_diabetes, cat_alcohol, cat_smoke, bin_mobilityproblem,
                          general_health_condition, mortstat, permth_exm, total_PAXMTSM),
                ~!is.na(.x))) %>%
  mutate(event_time = permth_exm / 12) %>% 
  ungroup() %>%
  mutate(across(c(contains("total"), contains("peak")), ~DescTools::Winsorize(.x, probs = c(0, 0.99)))) 

data_summary = sens_male_single %>%
  group_by(variable) %>%
  mutate(ind = row_number()) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable
    # labels = vlabs
  ))

df_means =
  df_mortality_sens %>%
  filter(gender == "Male") %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = sens_male_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = sens_male_single %>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "Male") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")




data_summary = sens_female_single %>%
  group_by(variable) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  mutate(variable_fac = factor(
    variable
    # labels = vlabs
    ))

df_means =
  df_mortality_sens %>%
  filter(gender == "Female") %>%
  group_by(mortstat) %>%
  summarize(across(c(contains("total"), contains("peak")), ~ mean(.x))) %>%
  pivot_longer(cols = -mortstat) %>%
  pivot_wider(names_from = mortstat,
              values_from = value,
              names_prefix = 'died_') %>%
  rename(variable = name)

# get the best variable based on concodance
best_var = data_summary %>%
  arrange(desc(concordance_mean)) %>%
  slice(1) %>%
  pull(variable)

best_var_vec = sens_female_single %>%
  filter(variable == best_var) %>%
  pull(concordance)

t_tests = sens_female_single%>%
  group_by(variable) %>%
  filter(variable != best_var) %>%
  nest() %>%
  mutate(t_test = map(
    data,
    ~ t.test(
      .x$concordance,
      best_var_vec,
      var.eq = FALSE,
      paired = FALSE,
      alternative = "less"
    )
  ),
  res = map(t_test, tidy)) %>%
  unnest(res) %>%
  ungroup() %>%
  select(variable, p.value)

# join t tests with summary data and make table
data_summary %>%
  left_join(t_tests) %>%
  left_join(df_means) %>%
  arrange(desc(concordance_mean)) %>%
  mutate(
    variable_fac = forcats::fct_reorder(variable_fac, concordance_mean),
    lb = concordance_mean - 1.96 * concordance_se,
    ub = concordance_mean + 1.96 * concordance_se,
    ci = paste0(
      sprintf("%0.3f", round(concordance_mean, 3)),
      " (",
      sprintf("%0.3f", round(lb, 3)),
      ", ",
      sprintf("%0.3f", round(ub, 3)),
      ")"
    )
  ) %>%
  select(variable_fac,
         ci,
         p.value,
         total_alive = died_0,
         total_deceased = died_1) %>%
  mutate(pvalue = style_pvalue(p.value, digits = 3)) %>%
  mutate(pvalue = ifelse(is.na(pvalue), "---", pvalue),
         across(starts_with("mean"), ~ if_else(is.na(.x), "---", as.character(round(
           .x, 0
         ))))) %>%
  rename(
    Variable = variable_fac,
    "Mean (95% CI)" = ci,
    "p-value" = pvalue,
    "Mean value among alive" = total_alive,
    "Mean value among deceased" = total_deceased
  ) %>%
  gt::gt() %>%
  gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>%
  gt::tab_style(
    style = list(cell_text(weight = "bold")),
    locations = cells_body(columns = everything(), rows = p.value > 0.05 |
                             is.na(p.value))
  ) %>%
  gt::cols_hide(p.value) %>%
  tab_header(title = "100x 10-fold Cross-Validated Single Variable Concordance", subtitle = "Female") %>%
  tab_footnote(footnote = "p-value for one sided t-test comparing variable to variable with higest mean concordance",
               locations = cells_column_labels(columns = "p-value")) %>%
  cols_align(columns = everything(), align = "left")

Multivariable

Code
sens_all = readRDS(here::here("results", "metrics_wtd_100_sens.rds"))
wt_small = 
  sens_all %>% 
  filter(model %in% c("Demo only", "scrfsteps+PAXMTSM", "scrfsteps", "PAXMTSM"))

summary = 
  wt_small %>% 
  group_by(model) %>% 
  summarize(across(concordance, list(mean = ~mean(.x),
                                       se = ~sd(.x)/sqrt(n())))) %>% 
  mutate(model_fac = factor(model, labels = c("Demographics only",
                                              "Demographics + MIMS", 
                                              "Demographics + Stepcount RF",
                                              "Demographics + Stepcount RF + MIMS")))

coef_scrf = 
  df_mortality_sens %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_scrfsteps + gender + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrfsteps")
  
coef_mims = 
  df_mortality_sens %>% 
  mutate(
     weight = full_sample_2_year_mec_exam_weight / 2,
                weight_norm = weight / mean(weight)) %>% 
  coxph(Surv(event_time, mortstat) ~ total_PAXMTSM + total_scrfsteps + gender + 
          age_in_years_at_screening + cat_education + bin_diabetes + cat_bmi + 
          race_hispanic_origin + chf + general_health_condition + chd + 
          heartattack + cancer + stroke + cat_alcohol + cat_smoke + 
          bin_mobilityproblem, weights = weight_norm, data  = .) %>% 
  tidy() %>% 
  select(term, estimate, std.error, robust.se, p.value) %>% 
  filter(grepl("steps", term)) %>% 
  mutate(hr = exp(estimate * 500), 
         lb = exp((estimate - 1.96 * std.error) * 500),
         ub = exp((estimate + 1.96 * std.error) * 500),
         model = "scrfsteps+PAXMTSM")
  
coefs = bind_rows(coef_scrf, coef_mims)
summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison") %>%
    cols_align(columns = everything(), align = "left")
Multivariable Model Comparison
Model Steps HR Steps p-value Model concordance
Demographics only --- NA 0.768 (0.765, 0.771)
Demographics + MIMS --- NA 0.773 (0.770, 0.776)
Demographics + Stepcount RF 0.952 (0.937, 0.967) 1.94e-05 0.776 (0.773, 0.779)
Demographics + Stepcount RF + MIMS 0.943 (0.920, 0.966) 0.00218 0.775 (0.771, 0.778)
Code
tb = 
  summary %>% 
  left_join(coefs)  %>% 
  mutate(conc_lb = concordance_mean - 1.96 * concordance_se,
         conc_ub = concordance_mean + 1.96 * concordance_se,
         conc_ci = paste0(sprintf("%0.3f",round(concordance_mean, 3)),
                       " (", sprintf("%0.3f",round(conc_lb, 3)), ", ", sprintf("%0.3f",round(conc_ub, 3)), ")"),
         hr_ci = case_when(!is.na(hr) ~ paste0(sprintf("%0.3f",round(hr, 3)),
                       " (", sprintf("%0.3f",round(lb, 3)), ", ", sprintf("%0.3f",round(ub, 3)), ")"),
                       TRUE ~ "---"),
         p.value = format.pval(p.value, digits = 3)) %>% 
  select(model_fac, hr_ci,p.value, conc_ci) %>% 
  rename(Model = model_fac,
         "Steps HR" = hr_ci,
         "Steps p-value" = p.value,
         "Model concordance" = conc_ci) %>% 
  gt::gt() %>%
    # gt::fmt_number(columns = starts_with("mean"), decimals = 0) %>% 
    tab_header(title = "Multivariable Model Comparison",
               subtitle = "Female") %>%
    cols_align(columns = everything(), align = "left") %>% 
  data.frame() %>% 
  kableExtra::kbl(format = "latex")
Code
sens_male = readRDS(here::here("results", "metrics_wtd_100_male_sens.rds"))  
sens_female = readRDS(here::here("results", "metrics_wtd_100_female_sens.rds"))  

sens_male %>%
  group_by(model) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  arrange(desc(concordance_mean))

sens_female %>%
  group_by(model) %>%
  summarize(across(concordance, list(
    mean = ~ mean(.x), se = ~ sd(.x) / sqrt(n())
  ))) %>%
  arrange(desc(concordance_mean))